clear 
set more off

* Bring in 1936 survey
    use "$Survey1936/output/ConsumptionSurvey_1936_foranalysis.dta", clear

    keep occ1950ej race south_merge tot_fam_inc

    keep if occ1950ej==21 | occ1950ej==81
    rename occ1950ej fatheroccej
    rename tot_fam_inc hh_income_fix

    tempfile survey36
    save `survey36'

* Bring in Census data 
    use "$CensusData/input/Census1940_fathers_ages30to50.dta", clear 
    drop if fatheroccej==99

    gen sample = fatheroccej!=. & race!=. & south_merge!=. & edu!=.
    keep if sample==1
    
    keep fatheroccej race south_merge hh_income edu 
    gen census=1
    
    gen problem_occs = fatheroccej==81 | fatheroccej==21
    gen hh_income_fix = hh_income if problem_occs==0
    
* Append 
    append using `survey36'
    replace census=0 if census==.

    egen doublet = group(fatheroccej race)
    egen triplet = group(fatheroccej race south_merge)
    egen fourplet = group(fatheroccej race south_merge edu)
    egen doublet_alt = group(race south_merge)
    
* F-statistic from our main regression 
    quietly tab triplet, gen(triplet_)

    reg hh_income_fix triplet_*
    gen fstat = `e(F)'
    
    
* To save estimates 
    forval i=1(1)6 {
        gen est_`i'=.
    }
    
* Estimates for just 1940 census  
    replace hh_income = log(hh_income)

    quietly reg hh_income i.fatheroccej 
    replace est_1 = `e(r2)' if _n==1
    display `e(N)'
    
    quietly reg hh_income i.race 
    replace est_2 = `e(r2)' if _n==1
    display `e(N)'
    
    quietly reg hh_income i.doublet 
    replace est_3 = `e(r2)' if _n==1
    display `e(N)'
    
    quietly reg hh_income i.doublet_alt 
    replace est_4 = `e(r2)' if _n==1
    display `e(N)'
    
    quietly reg hh_income i.triplet 
    replace est_5 = `e(r2)' if _n==1
    display `e(N)'
    
    quietly reg hh_income i.fourplet 
    replace est_6 = `e(r2)' if _n==1
    display `e(N)'
    
* Estimates for 1940 Census with 1936 fix 
    replace hh_income_fix = log(hh_income_fix)

    quietly reg hh_income_fix i.fatheroccej 
    replace est_1 = `e(r2)' if _n==2
    display `e(N)'
    
    quietly reg hh_income_fix i.race 
    replace est_2 = `e(r2)' if _n==2
    display `e(N)'
    
    quietly reg hh_income_fix i.doublet 
    replace est_3 = `e(r2)' if _n==2
    display `e(N)'
    
    quietly reg hh_income_fix i.doublet_alt
    replace est_4 = `e(r2)' if _n==2
    display `e(N)'
    
    quietly reg hh_income_fix i.triplet 
    replace est_5 = `e(r2)' if _n==2
    display `e(N)'

    drop if est_1==. 
    
    gen desc = "1940 Census" if _n==1
    replace desc = "1940, 1936 fix" if _n==2
    
    compress  
    save "$Mydirectory2/appendix_a/R2_results.dta", replace 